add INPUT-REQUIRED
authorJoey Hess <joeyh@joeyh.name>
Tue, 11 Mar 2025 15:46:31 +0000 (11:46 -0400)
committerJoey Hess <joeyh@joeyh.name>
Tue, 11 Mar 2025 15:46:31 +0000 (11:46 -0400)
Used by git-annex-compute-singularity to make addcomputed --fast work.

Also, simplified git-annex-compute-singularity; there is no need to hard
link the container into place. singularity does not care about the
extension of the container, so can just pass it the annex object file.

Command/AddComputed.hs
Command/Recompute.hs
Remote/Compute.hs
doc/design/compute_special_remote_interface.mdwn
doc/special_remotes/compute/git-annex-compute-singularity
doc/todo/compute_special_remote_remaining_todos.mdwn

index 4774caae9b7a7320d8f98104df2feb893b8e6e2e..50c0ee28f6e2b791cba0bced77640954c569c020 100644 (file)
@@ -206,14 +206,14 @@ addComputed maddaction stagefiles r reproducibleconfig choosebackend destfile fa
                Just v -> isReproducible v
                Nothing -> Remote.Compute.computeReproducible result
        
-getInputContent :: Bool -> OsPath -> Annex (Key, Maybe (Either Git.Sha OsPath))
-getInputContent fast p = catKeyFile p >>= \case
-       Just inputkey -> getInputContent' fast inputkey filedesc
+getInputContent :: Bool -> OsPath -> Bool -> Annex (Key, Maybe (Either Git.Sha OsPath))
+getInputContent fast p required = catKeyFile p >>= \case
+       Just inputkey -> getInputContent' fast inputkey required filedesc
        Nothing -> inRepo (Git.fileRef p) >>= \case
                Just fileref -> catObjectMetaData fileref >>= \case
                        Just (sha, _, t)
                                | t == Git.BlobObject ->
-                                       getInputContent' fast (gitShaKey sha) filedesc
+                                       getInputContent' fast (gitShaKey sha) required filedesc
                                | otherwise ->
                                        badinput $ ", not a git " ++ decodeBS (Git.fmtObjectType t)
                        Nothing -> notcheckedin
@@ -223,9 +223,9 @@ getInputContent fast p = catKeyFile p >>= \case
        badinput s = giveup $ "The computation needs an input file " ++ s ++ ": " ++ fromOsPath p
        notcheckedin = badinput "that is not checked into the git repository"
 
-getInputContent' :: Bool -> Key -> String -> Annex (Key, Maybe (Either Git.Sha OsPath))
-getInputContent' fast inputkey filedesc
-       | fast = return (inputkey, Nothing)
+getInputContent' :: Bool -> Key -> Bool -> String -> Annex (Key, Maybe (Either Git.Sha OsPath))
+getInputContent' fast inputkey required filedesc
+       | fast && not required = return (inputkey, Nothing)
        | otherwise = case keyGitSha inputkey of
                Nothing -> ifM (inAnnex inputkey)
                        ( do
index 6b21ce8ee7a5fc1b0dd0554bfbed1bc9da1e1c92..b85f5d449d08a9915b12f06f2466ec1d68346e75 100644 (file)
@@ -152,14 +152,14 @@ perform o r file origkey origstate = do
                check "not outputting"
                        Remote.Compute.computeOutputs origstate state
 
-       getinputcontent program p
+       getinputcontent program p required
                | originalOption o =
                        case M.lookup p (Remote.Compute.computeInputs origstate) of
-                               Just inputkey -> getInputContent' False inputkey
+                               Just inputkey -> getInputContent' False inputkey required
                                        (fromOsPath p ++ "(key " ++ serializeKey inputkey ++ ")")
                                Nothing -> Remote.Compute.computationBehaviorChangeError program
                                        "requesting a new input file" p
-               | otherwise = getInputContent False p
+               | otherwise = getInputContent False p required
        
        destfile outputfile
                | Just outputfile == origfile = Just file
index 7d21ddccdbce6f8f53ad0b5922f358cd90035128..3adec4bc5b65e19cda07128b3cbaaafc7f80df37 100644 (file)
@@ -201,17 +201,19 @@ programField = Accepted "program"
 data ProcessCommand
        = ProcessInput FilePath
        | ProcessOutput FilePath
+       | ProcessProgress PercentFloat
        | ProcessReproducible
        | ProcessSandbox
-       | ProcessProgress PercentFloat
+       | ProcessInputRequired FilePath
        deriving (Show, Eq)
 
 instance Proto.Receivable ProcessCommand where
        parseCommand "INPUT" = Proto.parse1 ProcessInput
        parseCommand "OUTPUT" = Proto.parse1 ProcessOutput
+       parseCommand "PROGRESS" = Proto.parse1 ProcessProgress
        parseCommand "REPRODUCIBLE" = Proto.parse0 ProcessReproducible
        parseCommand "SANDBOX" = Proto.parse0 ProcessSandbox
-       parseCommand "PROGRESS" = Proto.parse1 ProcessProgress
+       parseCommand "INPUT-REQUIRED" = Proto.parse1 ProcessInputRequired
        parseCommand _ = Proto.parseFail
 
 newtype PercentFloat = PercentFloat Float
@@ -392,9 +394,10 @@ runComputeProgram
        :: ComputeProgram
        -> ComputeState
        -> ImmutableState
-       -> (OsPath -> Annex (Key, Maybe (Either Git.Sha OsPath)))
-       -- ^ get input file's content, or Nothing the input file's
-       -- content is not available
+       -> (OsPath -> Bool -> Annex (Key, Maybe (Either Git.Sha OsPath)))
+       -- ^ Get input file's content, or Nothing the input file's
+       -- content is not available. True is passed when the input content
+       -- is required even for addcomputed --fast.
        -> Maybe (Key, MeterUpdate)
        -- ^ update meter for this key
        -> (ComputeProgramResult -> OsPath -> NominalDiffTime -> Annex v)
@@ -454,37 +457,8 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
                liftIO $ hFlush (stdinHandle p)
 
        parseoutput p tmpdir subdir result meterfile l = case Proto.parseMessage l of
-               Just (ProcessInput f) -> do
-                       let f' = toOsPath f
-                       let knowninput = M.member f'
-                               (computeInputs (computeState result))
-                       checksafefile tmpdir subdir f' "input"
-                       checkimmutable knowninput "inputting" f' $ do
-                               (k, inputcontent) <- getinputcontent f'
-                               let mkrel a = Just <$> 
-                                       (a >>= liftIO . relPathDirToFile subdir)
-                               mp <- case inputcontent of
-                                       Nothing -> pure Nothing
-                                       Just (Right obj)
-                                               | computeSandbox result -> 
-                                                       mkrel $ populatesandbox obj tmpdir
-                                               | otherwise ->
-                                                       mkrel $ pure obj
-                                       Just (Left gitsha) -> 
-                                               mkrel $ populategitsha gitsha tmpdir
-                               sendresponse p $
-                                       maybe "" fromOsPath mp
-                               let result' = result
-                                       { computeInputsUnavailable = 
-                                               isNothing mp || computeInputsUnavailable result
-                                       }
-                               return $ if immutablestate
-                                       then result'
-                                       else modresultstate result' $ \s -> s
-                                               { computeInputs = 
-                                                       M.insert f' k
-                                                               (computeInputs s)
-                                               }
+               Just (ProcessInput f) -> handleinput f False p tmpdir subdir result
+               Just (ProcessInputRequired f) -> handleinput f True p tmpdir subdir result
                Just (ProcessOutput f) -> do
                        let f' = toOsPath f
                        checksafefile tmpdir subdir f' "output"
@@ -525,6 +499,38 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
                Nothing -> giveup $
                        program ++ " output an unparseable line: \"" ++ l ++ "\""
 
+       handleinput f required p tmpdir subdir result = do
+               let f' = toOsPath f
+               let knowninput = M.member f'
+                       (computeInputs (computeState result))
+               checksafefile tmpdir subdir f' "input"
+               checkimmutable knowninput "inputting" f' $ do
+                       (k, inputcontent) <- getinputcontent f' required
+                       let mkrel a = Just <$> 
+                               (a >>= liftIO . relPathDirToFile subdir)
+                       mp <- case inputcontent of
+                               Nothing -> pure Nothing
+                               Just (Right obj)
+                                       | computeSandbox result -> 
+                                               mkrel $ populatesandbox obj tmpdir
+                                       | otherwise ->
+                                               mkrel $ pure obj
+                               Just (Left gitsha) -> 
+                                       mkrel $ populategitsha gitsha tmpdir
+                       sendresponse p $
+                               maybe "" fromOsPath mp
+                       let result' = result
+                               { computeInputsUnavailable = 
+                                       isNothing mp || computeInputsUnavailable result
+                               }
+                       return $ if immutablestate
+                               then result'
+                               else modresultstate result' $ \s -> s
+                                       { computeInputs = 
+                                               M.insert f' k
+                                                       (computeInputs s)
+                                       }
+
        modresultstate result f = 
                result { computeState = f (computeState result) }
 
@@ -630,7 +636,7 @@ computeKey rs (ComputeProgram program) k _af dest meterupdate vc =
                        (Just (k, p))
                        (postcompute keyfile)
 
-       getinputcontent state f =
+       getinputcontent state f _required =
                case M.lookup f (computeInputs state) of
                        Just inputkey -> case keyGitSha inputkey of
                                Nothing -> 
index 56bb90f14f1df4f9d3b89dd87988d4c44587dbd1..f286a0b7cd4dadcee0613d090417248a5823c400 100644 (file)
@@ -73,12 +73,13 @@ If an input file is not available, the program's stdin will be closed
 without a path being written to it. So when reading from stdin fails, 
 the program should exit.
 
-When `git-annex addcomputed --fast` is being used to add a computation
-to the git-annex repository without actually performing it, the 
-response to eaach `INPUT` will be an empty line rather than the path to 
-an input file. In that case, the program should proceed with the rest of
-its output to stdout (eg `OUTPUT` and `REPRODUCIBLE`), but should not
-perform any computation.
+When `git-annex addcomputed --fast` is being used to add a computation to
+the git-annex repository without actually performing it, the response to
+each `INPUT` will be an empty line rather than the path to an input file.
+This can also happen when an input file is not available for whatever
+reason. In this case, the program should proceed with the rest of its
+output to stdout (eg `OUTPUT` and `REPRODUCIBLE`), but should not perform
+any computation.
 
 ## output files
 
@@ -133,6 +134,16 @@ messages that the program can output. All of these are optional.
   that is done by making hard links, but it will fall back to copying annexed
   files if the filesystem does not support hard links.
 
+* `INPUT-REQUIRED`
+
+  This works the same as `INPUT`, except when `git-annex addcomputed --fast`
+  is being used to add a computation to the git-annex repository without
+  actually performing it, the input file will be provided as a response
+  to this, rather than the empty line provided as a response to `INPUT`.
+  
+  If the input file is not available for some reason, an empty line will
+  still be provided as a response to this.
+
 ## example
 
 An example `git-annex-compute-foo` shell script follows:
index d296e0162da43dc7b2cc5aa9e39181153e60c775..53b3a62cf5b9ea26ac2c08c803fe070fe6ad7cf6 100755 (executable)
@@ -78,17 +78,13 @@ else
        echo "SANDBOX"
        read pathtotop
        binddir="$(realpath "$pathtotop")"
-       echo "INPUT $pathtotop/$ANNEX_COMPUTE_passthrough"
-       read input
-       if [ -n "$input" ]; then
-               container="./$ANNEX_COMPUTE_passthrough"
-               mkdir -p "$(dirname "$container")"
-               ln "$(realpath "$input")" "$container"
-       else
-               echo "Unfortunately, addcomputed --fast cannot be used with git-annex-compute-singularity --passthrough=" >&2
+       echo "INPUT-REQUIRED $pathtotop/$ANNEX_COMPUTE_passthrough"
+       read container
+       if [ -z "$container" ]; then
+               echo "Cannot proceed without container $ANNEX_COMPUTE_passthrough" >&2
                exit 1
        fi
        # stdio is passed through to the git-annex-compute- command inside
-       # singularity
+       # the container
        run_singularity "$@" 2> >( strip_escape 1>&2 )
 fi
index f8917aded8182912cb3adba708538363e0c06753..c13a4e64255ab9cfb033ef2128aa7c4ff9c2ae30 100644 (file)
@@ -1,13 +1,17 @@
 This is the remainder of my todo list while I was building the
 compute special remote. --[[Joey]]
 
-* git-annex-compute-singularity with passthrough= cannot be used
-  by `git-annex addcomputed --fast` because the singularity image is not
-  available to run. Maybe make a varity of INPUT that is provided also
-  in --fast mode to solve this?
+* git-annex responds to each INPUT immediately, and flushes stdout.
+  This could cause problems if the program is sending several INPUT
+  first, before reading responses, as is documented it should do to allow
+  for parallel get of the input files.
 
 * write a tip showing how to use this
 
+* Support parallel get of input files. The design allows for this,
+  but how much parallelism makes sense? Would it be possible to use the
+  usual worker pool?
+
 * Write some simple compute programs so we have something to start with.
 
   - convert between images eg jpeg to png